/*******************************************************************************																	

	DESCRIPTION: 	This do file generates descriptive statistics and scatterplots 
					about individual predictor weights in the ensemble.
					
*******************************************************************************/

clear all
global id_code 140

* Import data:
use "${data}/004_Weights_combined.dta", clear

* Encode year:
destring year, replace

* Add labels:
label variable weight_rf "Random Forest"
label variable weight_boost "Gradient Boosting"
label variable weight_lasso "LASSO"
label variable year "Year"

cap drop outcome_nice
gen outcome_nice =  1 if outcome == "emplAft6M_0M_In"
replace outcome_nice = 2 if outcome == "emplAft6M_6M_In"
replace outcome_nice = 3 if outcome == "emplAft6M_12M_In"

cap label drop outcome_nice
label define outcome_nice 1 "At Start of Spell" 2 "6M into Spell" ///
	3 "12M into Spell" 
label values outcome_nice outcome_nice


* Plot:
twoway (connected weight_rf year, color("ebblue")) ///
	(connected weight_boost year, color("orange_red")) ///
	(connected weight_lasso year, color("61 11 55*0.8")) ///
	, ///
	xline(2006, lcolor(gray) lpatter(dash)) ///
	by(outcome_nice, graphregion(color(white)) note("") holes(2) legend(at(2) pos(0))) ///
	ytitle("Weight in Ensemble") ///
	ylabel(-0.25(0.25)1, angle(0) format(%5.2f)) ///
	xlabel(1992(4)2016, labsize(small)) ///
	legend(cols(1) size(small) symxsize(*0.5)) ///
	graphregion(color(white)) name(scatt1, replace)
	
graph export "${output}/${id_code}_Weights_Scatterplot.pdf", replace
